#Helpful link on clustering analysis from Whitney:
# https://jcoliver.github.io/learn-r/008-ggplot-dendrograms-and-heatmaps.html
Read in the data from Google Sheets
aguion_barnacle <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/1WfGsKOT0gFSN1yHUObDNSQflqd5avodSuf3kOZLTyLk/edit#gid=1947230832",
sheet = "UPDATED_S4_ResilienceAttributes",
range = "A6:P61",
col_names = TRUE,
na = "",
guess_max = 10000,
trim_ws = TRUE,
col_types = "c"
) %>%
as_tibble() %>%
clean_names() %>%
add_column(case_study = "Aguion",
reference = "aguion1",
.after = "question_id")
#######################################################################################################################
#burden_sardine <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/1umFwY33PSTJYmlfSIZiIWmT2gIBQeYCw/edit?usp=drive_web&ouid=1082#97509896161547373&rtpof=true",
# sheet = "UPDATED_S4_ResilienceAttributes",
# range = "A6:P61",
# col_names = TRUE,
# na = "",
# guess_max = 10000,
# trim_ws = TRUE,
# col_types = "c"
# ) %>%
# as_tibble() %>%
# clean_names()%>%
# add_column(case_study = "Burden",
# reference = "burden1",
# .after = "question_id")
#######################################################################################################################
dickey_collas_eur <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/1hfXTM5hwe0iLWQQmrc31-xruOV3SUixuxqRSuqC8ydA/edit#gid=1707192308",
sheet = "UPDATED_S4_ResilienceAttributes",
range = "A6:P61",
col_names = TRUE,
na = "",
guess_max = 10000,
trim_ws = TRUE,
col_types = "c"
) %>%
as_tibble() %>%
clean_names() %>%
add_column(case_study = "Dickey_Collas",
reference = "dickey_collas1",
.after = "question_id")
#######################################################################################################################
eurich_clam <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/1ZEIev1lcoD6Lxu3otzDCfbJJSiPJoZeyYbTvBXb_S9U/edit#gid=1453404713",
sheet = "UPDATED_S4_ResilienceAttributes",
range = "A6:P61",
col_names = TRUE,
na = "",
guess_max = 10000,
trim_ws = TRUE,
col_types = "c"
) %>%
as_tibble() %>%
clean_names() %>%
add_column(case_study = "Eurich",
reference = "eurich1",
.after = "question_id")
#####################################################################################################################################
free_crab <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/12zMb5el0QAB-CdZdmdKjWw5Krbwx7farqlUvbN08HEg/edit#gid=390317686",
sheet = "UPDATED_S4_ResilienceAttributes",
range = "A6:P61",
col_names = TRUE,
na = "",
guess_max = 10000,
trim_ws = TRUE,
col_types = "c"
) %>%
as_tibble() %>%
clean_names() %>%
add_column(case_study = "Free",
reference = "free1",
.after = "question_id")
#######################################################################################################################
#golden_madagascar <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/1w99nN8C_Vd82j4Y23g2ZG_kuEh_Jc2HkW6lnLkhzeCU/edit#gid=1980661832",
# sheet = "UPDATED_S4_ResilienceAttributes",
# range = "A6:P61",
# col_names = TRUE,
# na = "",
# guess_max = 10000,
# trim_ws = TRUE,
# col_types = "c"
# ) %>%
# as_tibble() %>%
# clean_names() %>%
# add_column(case_study = "Golden",
# reference = "golden1",
# .after = "question_id")
#######################################################################################################################
friedman_fiji <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/13fgkP1xHZB6gZqXUBHuylaC3ImCDIDTFFjqQ8RAoV-s/edit#gid=476628697",
sheet = "UPDATED_S4_ResilienceAttributes",
range = "A6:P61",
col_names = TRUE,
na = "",
guess_max = 10000,
trim_ws = TRUE,
col_types = "c"
) %>%
as_tibble() %>%
clean_names() %>%
add_column(case_study = "Friedman",
reference = "friedman1",
.after = "question_id")
#######################################################################################################################
#hollowed_bering_sea <- read_sheet(ss = #"https://docs.google.com/spreadsheets/d/1YOYZ9T7RR_8OkqQIWeZobhS-3yaEbKmuAVz16Nrrz9g/edit#gid=1980661832",
# sheet = "UPDATED_S4_ResilienceAttributes",
# range = "A6:P61",
# col_names = TRUE,
# na = "",
# guess_max = 10000,
# trim_ws = TRUE,
# col_types = "c"
# ) %>%
# as_tibble() %>%
# clean_names() %>%
# add_column(case_study = "Hollowed",
# reference = "hollowed1",
# .after = "question_id")
########################################################################################################################
kisara_squid <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/1RX1iUmEKsOUmuPZkn4SMA6zts9TTPz-gi0D6XFrY1RQ/edit#gid=934077877",
sheet = "UPDATED_S4_ResilienceAttributes",
range = "A6:P61",
col_names = TRUE,
na = "",
guess_max = 10000,
trim_ws = TRUE,
col_types = "c"
) %>%
as_tibble() %>%
clean_names() %>%
add_column(case_study = "Kisara",
reference = "kisara1",
.after = "question_id")
###########################################################################################################################
kleisner_jfi <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/1YM1wc6F0SgGIoBcTvSjVyLJquE6-6Chk1dPFoLiXuQY/edit#gid=1865610127",
sheet = "UPDATED_S4_ResilienceAttributes",
range = "A6:P61",
col_names = TRUE,
na = "",
guess_max = 10000,
trim_ws = TRUE,
col_types = "c"
) %>%
as_tibble() %>%
clean_names() %>%
add_column(case_study = "Kleisner",
reference = "kleisner1",
.after = "question_id")
################################################################################################################################
lau_png <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/18po7Pgq5Od6Q8yCnFdSfj_Gaxii34e-WsYwBFA3RTvo/edit#gid=1980661832",
sheet = "UPDATED_S4_ResilienceAttributes",
range = "A6:P61",
col_names = TRUE,
na = "",
guess_max = 10000,
trim_ws = TRUE,
col_types = "c"
) %>%
as_tibble() %>%
clean_names() %>%
add_column(case_study = "Lau",
reference = "lau1",
.after = "question_id")
###############################################################################################################################
mason_iceland <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/1ZPs5ow4wpZgj5eP26jvoMRgD9q6VT9DEdkJfqj7UrRE/edit#gid=2142384281",
sheet = "UPDATED_S4_ResilienceAttributes",
range = "A6:P61",
col_names = TRUE,
na = "",
guess_max = 10000,
trim_ws = TRUE,
col_types = "c"
) %>%
as_tibble() %>%
clean_names() %>%
add_column(case_study = "Mason",
reference = "mason1",
.after = "question_id")
##########################################################################################################################
mills_lobster <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/1HEPT54TsR1aTNNzDtZI59zU0eb-eJfH61ujErxAFtiI/edit#gid=553950990",
sheet = "UPDATED_S4_ResilienceAttributes",
range = "A6:P61",
col_names = TRUE,
na = "",
guess_max = 10000,
trim_ws = TRUE,
col_types = "c"
) %>%
as_tibble() %>%
clean_names() %>%
add_column(case_study = "Mills",
reference = "mills1",
.after = "question_id")
########################################################################################################################
pecl_lobster <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/11Z-SLlAS0f9Ug1Z8gLMnHkaN7hoZT1W0KMVLG8HtbXs/edit#gid=179679195",
sheet = "UPDATED_S4_ResilienceAttributes",
range = "A6:P61",
col_names = TRUE,
na = "",
guess_max = 10000,
trim_ws = TRUE,
col_types = "c"
) %>%
as_tibble() %>%
clean_names() %>%
add_column(case_study = "Pecl",
reference = "pecl1",
.after = "question_id")
###########################################################################################################################
#schmidt_senegal <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/1bUY6yII5y0FZfDRJ5lxpA7iD2p3o__2A1xsWivxwKB0/edit#gid=2000123802",
# sheet = "UPDATED_S4_ResilienceAttributes",
# range = "A6:P61",
# col_names = TRUE,
# na = "",
# guess_max = 10000,
# trim_ws = TRUE,
# col_types = "c"
# ) %>%
# as_tibble() %>%
# clean_names() %>%
# add_column(case_study = "Schmidt",
# reference = "schmidt1",
# .after = "question_id")
###############################################################################################################################
tokunaga_lobster <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/1jgFZut693B14rdKbxprKtQcoh-mFlMzAktekjhvwf2Q/edit#gid=228934601",
sheet = "UPDATED_S4_ResilienceAttributes",
range = "A6:P61",
col_names = TRUE,
na = "",
guess_max = 10000,
trim_ws = TRUE,
col_types = "c"
) %>%
as_tibble() %>%
clean_names() %>%
add_column(case_study = "Tokunaga",
reference = "tokunaga1",
.after = "question_id")
############################################################################################################################
westfall_hms <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/1DMx0Ux9Li1LhIe4ee5bbAYtJaw1vCezziRGyeERBthY/edit#gid=1818956842",
sheet = "UPDATED_S4_ResilienceAttributes",
range = "A6:P61",
col_names = TRUE,
na = "",
guess_max = 10000,
trim_ws = TRUE,
col_types = "c"
) %>%
as_tibble() %>%
clean_names() %>%
add_column(case_study = "Westfall",
reference = "westfall1",
.after = "question_id")
###################################################################################################################################
zhao_moorea <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/1-Muz76--fNv1qfdXZcBENP-agemTTySZAay9uAEO0HI/edit#gid=1980661832",
sheet = "UPDATED_S4_ResilienceAttributes",
range = "A6:P61",
col_names = TRUE,
na = "",
guess_max = 10000,
trim_ws = TRUE,
col_types = "c"
) %>%
as_tibble() %>%
clean_names() %>%
add_column(case_study = "Zhao",
reference = "zhao1",
.after = "question_id")
Merge the datasets
attributes_merged <- rbind(aguion_barnacle, dickey_collas_eur, eurich_clam, free_crab, friedman_fiji, kleisner_jfi, lau_png, mason_iceland, mills_lobster, pecl_lobster, tokunaga_lobster, westfall_hms, zhao_moorea)
# Data frames that don't bind:
# kisara_squid
Clean the merged datasets
attributes_merged_clean <- attributes_merged %>%
rename(dimension=dimensions,
domain=new_domain,
attribute=resilience_attribute,
score=score_1_4,
quality=information_quality,
importance=importance_of_attribute_in_study_system_high_medium_low,
work_as_described=does_the_mechanism_work_as_described_in_this_fishery_system) %>%
# Simplify
select(dimension, domain, case_study, reference, attribute, score, quality, importance, work_as_described) %>%
# Remove spacer rows
filter(!is.na(attribute)) %>%
# Format scores
filter(!is.na(score)) %>%
# In order to get rid of "NA" values that were purposefully entered into the data, I used the `case_when` function to revert
mutate(official_score = case_when(
score == "1" ~ "1",
score == "2" ~ "2",
score == "3" ~ "3",
score == "4" ~ "4",
score == "NA" ~ "0"
)) %>%
mutate(official_score=as.numeric(official_score)) %>%
# Formate dimension
mutate(dimension=recode_factor(dimension,
"Ecological"="Ecological",
"Socio-economic"="Social-economic",
"Governance-management"="Goverance")) %>%
# Format data quality
mutate(quality=ifelse(quality=="NA - Not relevant in this system", NA, quality),
quality=recode_factor(quality,
"E - No data/information; no basis for expert judgement"="No data", ### CF used "E - No data/information; no basis for expert judgement", but JE's was different
"D - not confident that the answer provided reflects the true state of the system"="Low",
"C - fairly confident that the answer provided reflects the true state of the system"="Fair",
"B - limited data/information and expert judgement"="Good",
"A - adequate and reliable data/information"="Excellent")) %>%
# Format importance
mutate(importance=stringr::str_to_sentence(importance),
importance=factor(importance, levels=c("Low", "Medium", "High"))) %>%
# Create a new column to house numeric values for importance
mutate(numeric_importance = case_when(
importance == "Low" ~ "1",
importance == "Medium" ~ "2",
importance == "High" ~ "3")) %>%
mutate(numeric_importance=as.numeric(numeric_importance)) %>%
filter(!is.na(numeric_importance))
Plot Dendrogram for Case Studies
# Prepare data for performing clustering analysis
case_clustering <- attributes_merged_clean %>%
select(case_study, attribute, official_score) %>%
# Use `pivot_wider` function to decrease the number of rows and increase the number of columns
# Also do values_fill = 0 to convert NA values to 0
pivot_wider(names_from = attribute, values_from = official_score, values_fill = 0)
# Prepare data for dendrogram
# Scale the data
case_scaled <- case_clustering
case_scaled[, c(2:39)] <- scale(case_scaled[, 2:39])
# Run clustering
case_matrix <- as.matrix(case_scaled[, -c(1)])
rownames(case_matrix) <- case_scaled$case_study
case_dendro <- as.dendrogram(hclust(d = dist(x = case_matrix)))
# Plot the dendrogram
case_dendrogram_plot <- ggdendrogram(data = case_dendro, rotate = TRUE) +
labs(title = "Hierarchical Clustering of Case Studies")
print(case_dendrogram_plot)

Plot Dendrogram for Attributes
# Prepare data for performing clustering analysis
attributes_clustering <- attributes_merged_clean %>%
select(case_study, attribute, official_score) %>%
# Use `pivot_wider` function to decrease the number of rows and increase the number of columns
# Also do values_fill = 0 to convert NA values to 0
pivot_wider(names_from = case_study, values_from = official_score, values_fill = 0)
# Prepare data for dendrogram
# Scale the data
attributes_scaled <- attributes_clustering
attributes_scaled[, c(2:13)] <- scale(attributes_scaled[, 2:13])
# Run clustering
attributes_matrix <- as.matrix(attributes_scaled[, -c(1)])
rownames(attributes_matrix) <- attributes_scaled$attribute
attributes_dendro <- as.dendrogram(hclust(d = dist(x = attributes_matrix)))
# Plot the dendrogram
attributes_dendrogram_plot <- ggdendrogram(data = attributes_dendro, rotate = TRUE) +
labs(title = "Hierarchical Clustering of Attributes")
print(attributes_dendrogram_plot)

Plot Dendrograms by Dimension
Ecologcial Hiearchical Clustering
# Prepare data for performing clustering analysis
ecological_clustering <- attributes_merged_clean %>%
select(case_study, dimension, attribute, official_score) %>%
filter(dimension == "Ecological") %>%
# Use `pivot_wider` function to decrease the number of rows and increase the number of columns
# Also do values_fill = 0 to convert NA values to 0
pivot_wider(names_from = case_study, values_from = official_score, values_fill = 0)
# Prepare data for dendrogram
# Scale the data
ecological_scaled <- ecological_clustering
ecological_scaled[, c(3:15)] <- scale(ecological_scaled[, 3:15])
# Run clustering
ecological_matrix <- as.matrix(ecological_scaled[, -c(1:2)])
rownames(ecological_matrix) <- ecological_scaled$attribute
ecological_dendro <- as.dendrogram(hclust(d = dist(x = ecological_matrix)))
# Plot the dendrogram
ecological_dendrogram_plot <- ggdendrogram(data = ecological_dendro, rotate = TRUE) +
labs(title = "Hierarchical Clustering of Attributes for Ecological Dimension")
print(ecological_dendrogram_plot)

Socio-Economic Hiearchical Clustering
# Prepare data for performing clustering analysis
socio_clustering <- attributes_merged_clean %>%
select(case_study, dimension, attribute, official_score) %>%
filter(dimension == "Social-economic") %>%
# Use `pivot_wider` function to decrease the number of rows and increase the number of columns
# Also do values_fill = 0 to convert NA values to 0
pivot_wider(names_from = case_study, values_from = official_score, values_fill = 0)
# Prepare data for dendrogram
# Scale the data
socio_scaled <- socio_clustering
socio_scaled[, c(3:14)] <- scale(socio_scaled[, 3:14])
# Run clustering
socio_matrix <- as.matrix(socio_scaled[, -c(1:2)])
rownames(socio_matrix) <- socio_scaled$attribute
socio_dendro <- as.dendrogram(hclust(d = dist(x = socio_matrix)))
# Plot the dendrogram
socio_dendrogram_plot <- ggdendrogram(data = socio_dendro, rotate = TRUE) +
labs(title = "Hierarchical Clustering of Attributes for Socio-Economic Dimension")
print(socio_dendrogram_plot)

Governance Hiearchical Clustering
# Prepare data for performing clustering analysis
governance_clustering <- attributes_merged_clean %>%
select(case_study, dimension, attribute, official_score) %>%
filter(dimension == "Goverance") %>%
# Use `pivot_wider` function to decrease the number of rows and increase the number of columns
# Also do values_fill = 0 to convert NA values to 0
pivot_wider(names_from = case_study, values_from = official_score, values_fill = 0)
# Prepare data for dendrogram
# Scale the data
governance_scaled <- governance_clustering
governance_scaled[, c(3:14)] <- scale(governance_scaled[, 3:14])
# Run clustering
governance_matrix <- as.matrix(governance_scaled[, -c(1:2)])
rownames(governance_matrix) <- governance_scaled$attribute
governance_dendro <- as.dendrogram(hclust(d = dist(x = governance_matrix)))
# Plot the dendrogram
governance_dendrogram_plot <- ggdendrogram(data = governance_dendro, rotate = TRUE) +
labs(title = "Hierarchical Clustering of Attributes for Governance Dimension")
print(governance_dendrogram_plot)

Plot Attribute Heatmaps
# Create theme
my_theme <- theme(axis.text.x=element_text(size=8, angle=35, hjust=1),
axis.text.y=element_text(size=8),
axis.title=element_text(size=9),
legend.text=element_text(size=8),
legend.title=element_text(size=9),
strip.text=element_text(size=9),
plot.title=element_text(size=10),
# Gridlines
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(),
axis.line = element_line())
# Heatmap of Scores Across all Attributes
# Select the appropriate columns
attributes_long <- melt(case_clustering, id = c("case_study"))
# Create heatmap
attributes_heatmap_plot <- ggplot(data = attributes_long,
aes(x = case_study, y = variable, text=value)) +
geom_tile(aes(fill = value)) +
labs(title = "Heatmap of Attributes by Case study", x = "Case Studies", y = "Attributes") +
scale_fill_viridis(discrete = FALSE) +
theme(axis.text.y = element_text(size = 6)) +
theme_bw() + my_theme +
geom_hline(yintercept = "Population modularity",
color = "red",
linetype = "solid") +
geom_hline(yintercept = "Agency",
color = "red",
linetype = "solid")
# Make the graph
ggplotly(attributes_heatmap_plot, tooltip="text")
# Data wrangling to organize importance clustering
importance_clustering <- attributes_merged_clean %>%
select(case_study, attribute, numeric_importance) %>%
# Use `pivot_wider` function to decrease the number of rows and increase the number of columns
# Also do values_fill = 0 to convert NA values to 0
pivot_wider(names_from = attribute, values_from = numeric_importance, values_fill = 0)
# Heatmap of Importance Scores Across all Attributes
# Select the appropriate columns
importance_long <- melt(importance_clustering, id = c("case_study"))
# Create heatmap
importance_heatmap_plot <- ggplot(data = importance_long,
aes(x = case_study, y = variable, text=value)) +
geom_tile(aes(fill = value)) +
labs(title = "Heatmap of Importance Scores Across all Attributes", x = "Case Studies", y = "Attributes") +
scale_fill_viridis(discrete = FALSE) +
theme(axis.text.y = element_text(size = 6)) +
theme_bw() + my_theme +
geom_hline(yintercept = "Population modularity",
color = "red",
linetype = "solid") +
geom_hline(yintercept = "Agency",
color = "red",
linetype = "solid")
importance_heatmap_plot

# Make the graph interactive
#(importance_heatmap_plot, tooltip="text")
attributes_heatmap_plot | importance_heatmap_plot

## Ecological Heatmap
# Prepare data for performing clustering analysis
#eco_heat_clustering <- attributes_merged_clean %>%
# select(case_study, dimension, attribute, official_score) %>%
# filter(dimension == "Ecological") %>%
# select(case_study, attribute, official_score) %>%
# Use `pivot_wider` function to decrease the number of rows and increase the number of columns
# Also do values_fill = 0 to convert NA values to 0
# pivot_wider(names_from = attribute, values_from = official_score, values_fill = 0)
# Select the appropriate columns
#eco_long <- melt(eco_heat_clustering, id = c("case_study"))
# Create heatmap
#eco_heatmap_plot <- ggplot(data = eco_long,
# aes(x = case_study, y = variable, text = value)) +
# geom_tile(aes(fill = value)) +
# labs(title = "Ecological Dimension Heatmap", x = "Case Studies", y = "Attributes") +
# scale_fill_viridis(discrete = FALSE) +
# theme(axis.text.y = element_text(size = 6)) +
# theme_bw() + my_theme
# Preview the heatmap
#ggplotly(eco_heatmap_plot, tooltip="text")
## Socio-Economic Heatmap
# Prepare data for performing clustering analysis
#socio_heat_clustering <- attributes_merged_clean %>%
# select(case_study, dimension, attribute, official_score) %>%
# filter(dimension == "Social-economic") %>%
# select(case_study, attribute, official_score) %>%
# Use `pivot_wider` function to decrease the number of rows and increase the number of columns
# Also do values_fill = 0 to convert NA values to 0
# pivot_wider(names_from = attribute, values_from = official_score, values_fill = 0)
# Select the appropriate columns
#socio_long <- melt(socio_heat_clustering, id = c("case_study"))
# Create heatmap
#socio_heatmap_plot <- ggplot(data = socio_long,
# aes(x = case_study, y = variable, text = value)) +
# geom_tile(aes(fill = value)) +
# labs(title = "Socio-Economic Dimension Heatmap", x = "Case Studies", y = "Attributes") +
# scale_fill_viridis(discrete = FALSE) +
# theme(axis.text.y = element_text(size = 6)) +
# theme_bw() + my_theme
# Preview the heatmap
#ggplotly(socio_heatmap_plot, tooltip="text")
Governance Heatmap
# Prepare data for performing clustering analysis
#gov_heat_clustering <- attributes_merged_clean %>%
# select(case_study, dimension, attribute, official_score) %>%
# filter(dimension == "Goverance") %>%
# select(case_study, attribute, official_score) %>%
# Use `pivot_wider` function to decrease the number of rows and increase the number of columns
# Also do values_fill = 0 to convert NA values to 0
# pivot_wider(names_from = attribute, values_from = official_score, values_fill = 0)
# Select the appropriate columns
#gov_long <- melt(gov_heat_clustering, id = c("case_study"))
# Create heatmap
#gov_heatmap_plot <- ggplot(data = gov_long,
# aes(x = case_study, y = variable, text = value)) +
# geom_tile(aes(fill = value)) +
# labs(title = "Governance Dimension Heatmap", x = "Case Studies", y = "Attributes") +
# scale_fill_viridis(discrete = FALSE) +
# theme(axis.text.y = element_text(size = 6)) +
# theme_bw() +my_theme
# Preview the heatmap
#ggplotly(gov_heatmap_plot, tooltip="text")